This project analyzes fatal shooting incidents in New York City using two NYPD datasets. The historic dataset covers incidents from 2006 through 2024, and the 2025 year to date dataset provides the most recent records. Together, these data allow an examination of long term patterns and current trends in fatal shootings across boroughs.
The analysis includes descriptive summaries of borough level proportions and counts, followed by a logistic regression model that evaluates whether victim sex and age group are associated with the likelihood of death. These approaches provide a clear view of how demographic factors and geographic context relate to fatal shooting outcomes in New York City.
knitr::include_graphics("Figures/img NYC.jpg")
library(tidyverse)
library(tidycensus)
library(sf)
library(mapview)
library(knitr)
library(kableExtra)
library(broom)
census_api_key("71edc420ec0b86cf5a54c3fc72727f4149467e79")
library(tidyverse)
dat1 = read.csv("./Data Folder/Shooting_Historic.csv")
dat2 = read.csv("./Data Folder/Shooting_2025.csv")
dat1 = dat1 |>
mutate(STATISTICAL_MURDER_FLAG = STATISTICAL_MURDER_FLAG %in% c(TRUE, "true", "True"))
dat2 = dat2 |>
mutate(STATISTICAL_MURDER_FLAG = STATISTICAL_MURDER_FLAG %in% c(TRUE, "true", "True"))
dat_all = bind_rows(
dat1 |> mutate(source = "historic"),
dat2 |> mutate(source = "y2025")
)
This section presents descriptive summaries of fatal shooting incidents across the five boroughs. It focuses on understanding basic patterns in proportions and counts before moving into spatial visualization and regression modeling.
This subsection summarizes the number of fatal shootings, the total number of shooting incidents, and the resulting proportion of fatal outcomes for each borough using the historic dataset from 2006 to 2024. It provides a clear comparison of how often shootings have resulted in death across boroughs over the long term.
dat1_boro = dat1 |>
group_by(BORO) |>
summarise(
n_murder = sum(STATISTICAL_MURDER_FLAG),
n_total = n(),
prop_murder = n_murder / n_total,
.groups = "drop"
)
dat1_boro |>
mutate(
n_murder = format(n_murder, big.mark = ","),
n_total = format(n_total, big.mark = ","),
prop_murder = sprintf("%.2f", prop_murder)
) |>
kable(
col.names = c("Borough", "Murder cases", "Total shootings", "Murder proportion"),
align = c("l", "r", "r", "r"),
caption = "Fatal Shooting Proportions by Borough (2006 to 2024)"
) |>
kable_styling(full_width = FALSE, position = "center")
| Borough | Murder cases | Total shootings | Murder proportion |
|---|---|---|---|
| BRONX | 1,728 | 8,834 | 0.20 |
| BROOKLYN | 2,277 | 11,685 | 0.19 |
| MANHATTAN | 719 | 3,977 | 0.18 |
| QUEENS | 871 | 4,426 | 0.20 |
| STATEN ISLAND | 170 | 822 | 0.21 |
This subsection presents the number of fatal shootings, the total number of shooting incidents, and the resulting proportion of fatal outcomes for each borough using the full dataset that includes all available years through 2025. This updated summary shows how borough level fatality patterns change when recent incidents are incorporated.
dat_all_boro_prop = dat_all |>
group_by(BORO) |>
summarise(
n_murder = sum(STATISTICAL_MURDER_FLAG),
n_total = n(),
prop_murder = n_murder / n_total,
.groups = "drop"
)
dat_all_boro_count = dat_all |>
group_by(BORO) |>
summarise(
n_murder = sum(STATISTICAL_MURDER_FLAG),
n_total = n(),
.groups = "drop"
)
nyc_boro = tigris::counties(state = "NY", cb = TRUE) |>
filter(NAME %in% c("Bronx", "Kings", "New York", "Queens", "Richmond"))
boro_to_county = function(df) {
df |>
mutate(
county_name = case_when(
BORO == "MANHATTAN" ~ "New York",
BORO == "BROOKLYN" ~ "Kings",
BORO == "BRONX" ~ "Bronx",
BORO == "QUEENS" ~ "Queens",
BORO == "STATEN ISLAND" ~ "Richmond"
)
)
}
dat_all_boro_prop = boro_to_county(dat_all_boro_prop)
dat_all_boro_count = boro_to_county(dat_all_boro_count)
dat_all_boro_prop |>
select(BORO, n_murder, n_total, prop_murder) |>
mutate(
BORO = stringr::str_to_title(stringr::str_to_lower(BORO)),
n_murder = format(n_murder, big.mark = ","),
n_total = format(n_total, big.mark = ","),
prop_murder = sprintf("%.2f", prop_murder)
) |>
kable(
col.names = c("Borough", "Murder cases", "Total shootings", "Murder proportion"),
caption = "Fatal Shooting Proportions by Borough (All Years Including 2025)",
align = c("l", "r", "r", "r")
) |>
kable_styling(full_width = FALSE, position = "center")
| Borough | Murder cases | Total shootings | Murder proportion |
|---|---|---|---|
| Bronx | 1,728 | 9,132 | 0.19 |
| Brooklyn | 2,277 | 11,951 | 0.19 |
| Manhattan | 719 | 4,088 | 0.18 |
| Queens | 871 | 4,512 | 0.19 |
| Staten Island | 170 | 830 | 0.20 |
This section presents maps that visually display geographic differences in fatal shooting proportions and counts. The maps help reveal spatial patterns that are less obvious in tables or descriptive summaries.
This map shows the proportion of fatal shootings within each borough using historic data between 2006 and 2024, emphasizing long established spatial differences.
nyc_boro = tigris::counties(state = "NY", cb = TRUE) |>
filter(NAME %in% c("Bronx", "Kings", "New York", "Queens", "Richmond"))
dat1_boro = dat1_boro |>
mutate(
county_name = case_when(
BORO == "MANHATTAN" ~ "New York",
BORO == "BROOKLYN" ~ "Kings",
BORO == "BRONX" ~ "Bronx",
BORO == "QUEENS" ~ "Queens",
BORO == "STATEN ISLAND" ~ "Richmond"
)
)
nyc_boro_map = nyc_boro |>
left_join(dat1_boro, by = c("NAME" = "county_name"))
mapview(nyc_boro_map,
zcol = "prop_murder",
layer.name = "Proportion of Murder by Borough",
popup = paste0("Borough: ", nyc_boro_map$BORO,
"<br>Murder proportion: ",
round(nyc_boro_map$prop_murder, 3)))
This map incorporates all years of data to illustrate the most recent geographic distribution of fatal shooting proportions.
nyc_boro_map_prop = nyc_boro |>
left_join(dat_all_boro_prop, by = c("NAME" = "county_name"))
mapview(
nyc_boro_map_prop,
zcol = "prop_murder",
layer.name = "Proportion of Murder by Borough (all years)",
popup = paste0(
"Borough: ", nyc_boro_map_prop$BORO, "<br>",
"Murder proportion: ", round(nyc_boro_map_prop$prop_murder, 3), "<br>",
"Total shootings: ", nyc_boro_map_prop$n_total
)
)
This map displays borough level case counts during the historic period and shows where fatal shootings were most concentrated.
dat1_boro = dat1 |>
group_by(BORO) |>
summarise(
n_murder = sum(STATISTICAL_MURDER_FLAG),
n_total = n(),
.groups = "drop"
)
dat1_boro <- dat1_boro |>
mutate(
COUNTY = case_when(
BORO == "MANHATTAN" ~ "New York",
BORO == "BROOKLYN" ~ "Kings",
BORO == "BRONX" ~ "Bronx",
BORO == "QUEENS" ~ "Queens",
BORO == "STATEN ISLAND" ~ "Richmond"
)
)
library(tigris)
options(tigris_progress = FALSE, tigris_use_cache = TRUE)
nyc_boro_map <- counties(state = "NY", cb = TRUE) |>
filter(NAME %in% c("Bronx", "Kings", "New York", "Queens", "Richmond"))
nyc_boro_map <- nyc_boro_map |>
left_join(dat1_boro, by = c("NAME" = "COUNTY"))
mapview(
nyc_boro_map,
zcol = "n_murder",
layer.name = "Number of Murder Cases by Borough",
popup = paste0(
"Borough: ", nyc_boro_map$BORO, "<br>",
"Number of murder cases: ", nyc_boro_map$n_murder, "<br>",
"Total shootings: ", nyc_boro_map$n_total
)
)
This map includes all available data to present the current distribution of case counts across New York City.
nyc_boro_map_count = nyc_boro |>
left_join(dat_all_boro_count, by = c("NAME" = "county_name"))
mapview(
nyc_boro_map_count,
zcol = "n_murder",
layer.name = "Number of Murder Cases by Borough (all years)",
popup = paste0(
"Borough: ", nyc_boro_map_count$BORO, "<br>",
"Number of murder cases: ", nyc_boro_map_count$n_murder, "<br>",
"Total shootings: ", nyc_boro_map_count$n_total
)
)
This section examines individual level predictors of fatal outcomes using logistic regression. It builds on the descriptive results by quantifying how sex and age group relate to the probability of death.
A logistic regression model was used with fatal outcome as the dependent variable and victim sex and age group as predictors.
dat_all_stable = dat_all |>
filter(VIC_SEX %in% c("M", "F")) |>
filter(!VIC_AGE_GROUP %in% c("UNKNOWN", "", NA)) |>
mutate(
age_simple = case_when(
VIC_AGE_GROUP %in% c("<18", "18-24") ~ "young",
VIC_AGE_GROUP %in% c("25-44", "45-64") ~ "adult",
VIC_AGE_GROUP == "65+" ~ "old"
),
age_simple = factor(age_simple, levels = c("adult", "young", "old")),
STATISTICAL_MURDER_FLAG = as.numeric(STATISTICAL_MURDER_FLAG),
VIC_SEX = factor(VIC_SEX)
)
fit_stable = glm(
STATISTICAL_MURDER_FLAG ~ VIC_SEX + age_simple,
data = dat_all_stable,
family = binomial()
)
or_table = tidy(fit_stable, exponentiate = TRUE, conf.int = TRUE) |>
mutate(
term = case_when(
term == "(Intercept)" ~ "Intercept",
term == "VIC_SEXM" ~ "Victim sex (male vs female)",
term == "age_simpleyoung" ~ "Age group young vs adult",
term == "age_simpleold" ~ "Age group old vs adult",
TRUE ~ term
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2)
) |>
select(
Predictor = term,
`Odds ratio` = estimate,
`Lower 95 percent CI` = conf.low,
`Upper 95 percent CI` = conf.high
)
or_table |>
kable(
caption = "Adjusted odds ratios for fatal shooting outcomes",
align = c("l", "r", "r", "r")
) |>
kable_styling(full_width = FALSE, position = "center")
| Predictor | Odds ratio | Lower 95 percent CI | Upper 95 percent CI |
|---|---|---|---|
| Intercept | 0.28 | 0.26 | 0.31 |
| Victim sex (male vs female) | 0.97 | 0.88 | 1.07 |
| Age group young vs adult | 0.66 | 0.62 | 0.70 |
| Age group old vs adult | 1.51 | 1.14 | 1.99 |
The regression results indicate that age is a significant predictor of fatal outcomes. Younger victims (under 25) have substantially lower odds of dying from a shooting compared with adults, while older victims (65+) have higher odds of fatality. Victim sex is not significantly associated with the likelihood of death. These patterns align with established findings in trauma epidemiology, where mortality risk tends to be lower among younger individuals and higher among older adults.
knitr::include_graphics("Figures/img_statue.jpg")